# Analysis Functions ------------------------------------------------------

#Sourcing Independence Function
source(file.path(iPath, "Independence_Function_1.R"))
source(file.path(iPath, "Severity.R"))

Flood_Data_Analysis <- function(HUC8, Start, End, FLAG, BTOP, PTOB){
    
    HUC_MetaFile <- read.csv(file.path(iPath_Out_Meta, HUC8), header = TRUE, sep = ",")  
    
    HUC_MetaFile$HUC8[nchar(HUC_MetaFile$HUC8)==7] <- paste0("0", HUC_MetaFile$HUC8[nchar(HUC_MetaFile$HUC8)==7])
    
    
    #Organizing and Prepping the inputs to be compared to one another
    Site_MetaFile <- read.csv(file.path(iPath_Out_Meta,Meta_SiteHUC8), header = TRUE, sep = ",")
    Site_MetaFile <- Site_MetaFile[order(Site_MetaFile$HUC8),]
    
    Site_MetaFile$Site_No[nchar(Site_MetaFile$Site_No)==7] <- paste0("0", Site_MetaFile$Site_No[nchar(Site_MetaFile$Site_No)==7])
    Site_MetaFile$Site_No[nchar(Site_MetaFile$Site_No)==9] <- paste0("0", Site_MetaFile$Site_No[nchar(Site_MetaFile$Site_No)==9])
    
    Site_MetaFile$HUC8[nchar(Site_MetaFile$HUC8)==7] <- paste0("0", Site_MetaFile$HUC8[nchar(Site_MetaFile$HUC8)==7])
    
    #Site_MetaFile$HUC8[is.na(Site_MetaFile$HUC8)] <- 0
    
    #Reading in the Threshold Values
    Return_Period <- read.csv(file.path(iPath_Out_Meta, Return_Period_HUC8), header = TRUE, sep = "," )

    Return_Period$Site_No[nchar(Return_Period$Site_No)==7] <- paste0("0", Return_Period$Site_No[nchar(Return_Period$Site_No)==7])
    Return_Period$Site_No[nchar(Return_Period$Site_No)==9] <- paste0("0", Return_Period$Site_No[nchar(Return_Period$Site_No)==9])
    
    #Creating a list of the Site Id for each site in the basin
    #If the site does not have data, do not include in the analysis.
    Basin_Frequency <- NULL
    Basin_Severity <- NULL
    Event_Summary <- NULL
    All_Events <- NULL
    Site_Events_List <- NULL
    for(b in 1:462){
      print(HUC_MetaFile$HUC8[b])
      Site_List <- NULL
      for(i in 1:nrow(Site_MetaFile)){
        if (Site_MetaFile$HUC8[i] == HUC_MetaFile$HUC8[b]){
          Daily <- file.path(iPath_Out_Daily, paste0(Site_MetaFile$Site_No[i], "_Daily_Discharge.csv"))
          if(file.exists(Daily)){
            Site_List <- rbind(Site_List, data.frame(Site_ID = Site_MetaFile$Site_No[i]))
          }        
        }  
      }
      
      ##To subset... use the match function
      Site_List$Q2 <- Return_Period$Q2[match(Site_List$Site_ID, Return_Period$Site_No)] 
      
      #Creating New Index to Merge with Data Files once subsetted
      Start <- as.Date(Start)
      End <- as.Date(End)
      Day_Indicies <- data.frame(Date = seq.Date(Start, End, by = "day"))
      Day_Indicies$Date_Index <- seq.int(nrow(Day_Indicies))
      
      Start <- as.character(Start)
      End <- as.character(End)
      
      #Creating the List of all Sites and the Temporal Window and begin Analysis
      Data_List <- list()
      Site_Events <- list()
      Temporal_Window <- NULL
      Site_Independence <- list()
      Basin_Events <- NULL
      All_Site_Events <- NULL
      for (i in 1:nrow(Site_List)){
        Data_List[[i]] <- read.csv(file.path(iPath_Out_Daily, paste0(Site_List$Site_ID[i], "_Daily_Discharge.csv")))
        Data_List[[i]] <- Data_List[[i]][,-1]
        Data_List[[i]]$Date <- as.Date(Data_List[[i]]$Date)
        
        #Subsetting data by the dates entered into the function
        Data_List[[i]] <- subset(Data_List[[i]], Date >= Start)
        Data_List[[i]] <- subset(Data_List[[i]], Date <= End)
        
        #Create Index from 1 -> Number of Days
        Data_List[[i]] <- merge(Day_Indicies, Data_List[[i]], by = "Date", all.x = TRUE)
        
        #For all Missing Data assign 0 (For the portions of missing years)
        Data_List[[i]]$Discharge_cms[is.na(Data_List[[i]]$Discharge_cms)] <- 0
        
        #Calculate Change in Discharge
        Data_List[[i]]$DeltaQ <- c(0,diff(Data_List[[i]]$Discharge_cms))
        
        #Assign the Score to Delta Q
        for (j in 1:nrow(Data_List[[i]])) {
          Data_List[[i]]$Score_Q[j][Data_List[[i]]$DeltaQ[j] > 0] <- 1
          Data_List[[i]]$Score_Q[j][Data_List[[i]]$DeltaQ[j] == 0] <- 0
          Data_List[[i]]$Score_Q[j][Data_List[[i]]$DeltaQ[j] < 0] <- -1
        }
        
        #Determine Peaks vs. Non-Peaks
        for (k in 1:(nrow(Data_List[[i]])-3)){
          if (!is.na(Data_List[[i]]$Score_Q[k])){
            if (((Data_List[[i]]$Score_Q[k] == 1) && (Data_List[[i]]$Score_Q[k+1] == -1))|
                ((Data_List[[i]]$Score_Q[k] == 1) && (Data_List[[i]]$Score_Q[k+1] == 0) && (Data_List[[i]]$Score_Q[k+2] == -1))|
                ((Data_List[[i]]$Score_Q[k] == 1) && (Data_List[[i]]$Score_Q[k+1] == 0) && (Data_List[[i]]$Score_Q[k+2] == 0) 
                 && (Data_List[[i]]$Score_Q[k+3] == -1))){
              Data_List[[i]]$Peak[k] <- TRUE
            }
            else{
              Data_List[[i]]$Peak[k] <- FALSE
            }
          }
        }
        
        #Determine POT based on Q2
        Flag <- FALSE
        Times <- data.frame()
        Peak_Discharge <-NULL
        for (l in 1:nrow(Data_List[[i]])){
          if (!is.na(Site_List$Q2[i])){  #if the site has a Q2 value run the analysis to determine peak times
            if ((Flag == FALSE) && (Site_List$Q2[i] <= Data_List[[i]]$Discharge_cms[l])){
              Flag <- TRUE
              Base_Time <- (Data_List[[i]]$Date_Index[l]-1)
            }
            if (Data_List[[i]]$Peak[l] == TRUE && Data_List[[i]]$Discharge_cms[l] >= Site_List$Q2[i]){
              Peak_Discharge <- rbind(Peak_Discharge, c(Data_List[[i]]$Date_Index[l], Data_List[[i]]$Discharge_cms[l]))
            }
            if (Flag == TRUE && Site_List$Q2[i] > Data_List[[i]]$Discharge_cms[l]){
              Flag <- FALSE
              End_Time <- Data_List[[i]]$Date_Index[l]
              Peak_IDX <- which.max(Peak_Discharge[,2])
              if(length(Peak_IDX) == 0){
                Times <- Times
              }
              else{
                Times <- rbind(Times, data.frame(Site_ID = Site_List$Site_ID[i], HUC8 = HUC_MetaFile$HUC[b], Base_Time, Peak_Time = Peak_Discharge[Peak_IDX, 1], 
                                                 End_Time, Peak_Discharge = Peak_Discharge[Peak_IDX,2]))
                Peak_Discharge <- NULL
              }
            }
          }
          else{ #if no Q2 return an empty matrix. 
            Times <- Times
          }
        }
        
        #Place all POT's into a list by site id
        Site_Events[[i]] <- Times
        if(nrow(Site_Events[[i]]) > 0){ #If the site has POT compute the BtoP and PtoB
          Site_Events[[i]]$BtoP <- (Site_Events[[i]]$Peak_Time - Site_Events[[i]]$Base_Time)
          Site_Events[[i]]$PtoB <- (Site_Events[[i]]$End_Time - Site_Events[[i]]$Peak_Time)
        }
        else{ #if no POT return empty matrix
          Site_Events[[i]] <- Site_Events[[i]]
        }
        
        #Average BtoP and PtoB for each of the sites within the HUC region
        #If the Site Events list is empty, then we assign 0 if not, then we take the median of the Site Events BtoP and PtoB
        if (nrow(Site_Events[[i]]) > 0){
          if(FLAG == TRUE){
              Temporal_Window <- rbind(Temporal_Window, data.frame(BtoP = round(median(Site_Events[[i]]$BtoP)), 
                                                                   PtoB = round(median(Site_Events[[i]]$PtoB))))
          }
          
          if(FLAG == FALSE){
            Temporal_Window <- rbind(Temporal_Window, data.frame(BtoP = BTOP, PtoB = PTOB))
          }
      
        }
        else{
          Temporal_Window <- rbind(Temporal_Window, data.frame(BtoP = 0, PtoB = 0))
        }
        
        #Recalculating the time of each individual peak based on the median of BtoP and PtoB
        for (m in 1:nrow(Site_Events[[i]])){
          #Site_Events[[i]]$Event_Base[m] <- Site_Events[[i]]$Peak_Time[m] - Temporal_Window$BtoP[i]
          Site_Events[[i]]$Event_Base[m] <- Site_Events[[i]]$Base_Time[m]
          Site_Events[[i]]$Event_Peak[m] <- Site_Events[[i]]$Peak_Time[m]
          #Site_Events[[i]]$Event_End[m] <- Site_Events[[i]]$Peak_Time[m] + Temporal_Window$PtoB[i]
          Site_Events[[i]]$Event_End[m] <- Site_Events[[i]]$End_Time[m]
        }
        
        All_Site_Events <- rbind(All_Site_Events, Site_Events[[i]])
        
        #Creating Dataframes for Site Specific Independence
        #If the Site only has one event, dont run independence on that site as the POT is already independent
        if (nrow(Site_Events[[i]]) > 0){
          Site_Independence[[i]] <- Independence_1(Site_Events[[i]])
          Site_Independence[[i]]$Site_ID <- Site_List$Site_ID[i]
        }
        else{
          Site_Independence[[i]] <- data.frame(Event_Base = Site_Events[[i]]$Event_Base, Event_Peak = Site_Events[[i]]$Peak_Time, 
                                               Event_End = Site_Events[[i]]$Event_End, Peak_Discharge = Site_Events[[i]]$Peak_Discharge)
        }
        
        Site_Independence[[i]]$Peak_Discharge <- (Site_Independence[[i]]$Peak_Discharge/Site_List$Q2[i])      
        
        
        #Data frame of all Site Specific Events
        if(nrow(Site_Events[[i]]) > 0){
          Basin_Events <- rbind(Basin_Events, Site_Independence[[i]])
          Basin_Events <- Basin_Events[order(Basin_Events$Event_Base, Basin_Events$Event_End),]
        }
        else{ #if the Basin has 0 events return empty dataframe (0 Events)
          Basin_Events <- rbind(Basin_Events, data.frame())
        }
      }
      
      #Final Data Frame of Basin Specifc Events
      if (nrow(Basin_Events) > 1){ #If more then 1 site specific event need to run independence
        Basin_Independence <- Independence_1(Basin_Events)
      }
      else{ #if less than 1 event, Basin events already identified as independent.
        Basin_Independence <- Basin_Events[,-5]
      }
      
      Interevent <- NULL
      for (n in 1:nrow(Basin_Independence)) {
        Basin_Independence$Event_Duration[n] <- (Basin_Independence$Event_End[n] - Basin_Independence$Event_Base[n])
      }
      
      for(o in 1:nrow(Basin_Independence)){
        if(nrow(Basin_Independence) > 1){
          Interevent[o] <- Basin_Independence$Event_Base[o+1] - Basin_Independence$Event_End[o]
        }
        else{
          Interevent[o] <- 0 
        }
      }
      
      Mu <- mean(Interevent, na.rm = T)
      Med <- median(Interevent, na.rm = T)
      Max <- max(Interevent, na.rm = T)
      Min <- min(Interevent, na.rm = T)
      
      if(nrow(Basin_Independence)>0){
        Min_D <- min(Basin_Independence$Event_Duration, na.rm = T)
        Mean_D <- mean(Basin_Independence$Event_Duration, na.rm=T)
        Max_D <- max(Basin_Independence$Event_Duration, na.rm = T)
      }
      else{
        Min_D <- 0
        Mean_D <- 0
        Max_D <- 0
      }
      
      for(p in 1:nrow(Basin_Independence)){
        if(nrow(Basin_Independence)>0){
           Basin_Independence$HUC8[p] <- HUC_MetaFile$HUC8[b]
        }
      }
      
      if(nrow(Basin_Events) > 0){
        Basin_Severity <- Severity(Basin_Events, Site_List)
        Basin_Independence <- cbind(Basin_Independence, Basin_Severity, Site_Count = HUC_MetaFile$Site_Count[b])
      }
      
      Overlap <- IRanges((Basin_Events$Event_Base), Basin_Events$Event_End)
      Basin_Events$Group <- subjectHits(findOverlaps(Overlap, reduce(Overlap)))

      Basin_Events <- merge(Basin_Events, Basin_Independence, by = "Group", all.x = TRUE)
      colnames(Basin_Events) <- c("Event_ID", "Site_Base", "Site_Peak", "Site_End", "Peak_Discharge", "Site_ID", "Basin_Start",
                                  "Basin_Peak", "Basin_End", "Peak_Discharge", "Duration","HUC8", "Impacted", "Total_Discharge",
                                  "Severity", "HUC_Site_Count")

      All_Events <- rbind(All_Events, Basin_Events)

      Site_Events_List <- rbind(Site_Events_List, All_Site_Events)
    
      Event_Summary <- rbind(Event_Summary, Basin_Independence) 
      
      #Flood event frequency count for each HUC 8. 
      Basin_Specific_Freq <- sum(nrow(Basin_Independence))
      Basin_Frequency <- rbind(Basin_Frequency, 
                               data.frame(HUC8 = HUC_MetaFile$HUC8[b], Site_Count = HUC_MetaFile$Site_Count[b],
                               Missing_Daily = HUC_MetaFile$Missing_Site_Count[b],
                               Catchment_Area = HUC_MetaFile$Drainage_Area[b], Years_Record = HUC_MetaFile$Mean_Record[b], 
                               Frequency = Basin_Specific_Freq, Inter_Mean = Mu, Inter_Median = Med, Inter_Max = Max, 
                               Inter_Min = Min, Duration_Min = Min_D, Duration_Mean = Mean_D, Duration_Max = Max_D))
      
      #List of Sites and their Temporal Windows
      #print(Site_List)
      #print(Temporal_Window)
      
      #Site Specific Events
      #print(Site_Events)
      #print(Site_Independence)
      
      #Basin Specific Events
      #print(Basin_Events)
      #print(Basin_Independence)
      #print(Basin_Specific_Freq)
    }
    #return(Basin_Frequency) #Event Summary Statistics for each HUC8
    #return(Event_Summary)  #All of the Aggregated events within a HUC8
    #return(Site_Events_List) #All of the Individual Site Events
    return(All_Events) #All of the Events that Aggregate to a single Basin Event
}
